{
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "import torch \n",
        "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline "
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": "2024-08-20 03:12:24.098444: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n2024-08-20 03:12:24.098474: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
        }
      ],
      "execution_count": 1,
      "metadata": {
        "gather": {
          "logged": 1724123553820
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "torch.random.manual_seed(0) \n",
        "model = AutoModelForCausalLM.from_pretrained( \n",
        "    \"../phi-3-instruct\",  \n",
        "    device_map=\"cuda\",  \n",
        "    torch_dtype=\"auto\",  \n",
        "    trust_remote_code=True,  \n",
        ") "
      ],
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]",
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "c29888138a4f4714b7f65112733da2d2"
            }
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123630099
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "tokenizer = AutoTokenizer.from_pretrained(\"../phi-3-instruct\") "
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": "You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 - if you loaded a llama tokenizer from a GGUF file you can ignore this message.\n"
        }
      ],
      "execution_count": 3,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123630234
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# messages = [ \n",
        "#     {\"role\": \"system\", \"content\": \"你是我的人工智能助手，协助我用中文解答问题\"}, \n",
        "#     {\"role\": \"user\", \"content\": \"你能否介绍一下如何提升学习效率吗?\"}, \n",
        "# ] \n",
        "\n",
        "# <|system|>\n",
        "# You are a helpful assistant.<|end|>\n",
        "# <|user|>\n",
        "# Question?<|end|>\n",
        "# <|assistant|>\n",
        "\n",
        "messages = \"<|system|>\\n 你是我的人工智能助手，协助我用中文解答问题.\\n<|end|><|user|>\\n 你知道长沙吗？? \\n<|end|><|assistant|>\"\n"
      ],
      "outputs": [],
      "execution_count": 9,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123692174
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "pipe = pipeline( \n",
        "    \"text-generation\", \n",
        "    model=model, \n",
        "    tokenizer=tokenizer, \n",
        ") "
      ],
      "outputs": [],
      "execution_count": 10,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123693760
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "generation_args = { \n",
        "    \"max_new_tokens\": 1024, \n",
        "    \"return_full_text\": False, \n",
        "    \"temperature\": 0.3, \n",
        "    \"do_sample\": False, \n",
        "} "
      ],
      "outputs": [],
      "execution_count": 11,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123695747
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "output = pipe(messages, **generation_args) "
      ],
      "outputs": [],
      "execution_count": 12,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123704229
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "output[0]['generated_text']"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "' 是的，我知道长沙。长沙是中国南部的一座大城市，位于湖南省中部，是中国的国际性大都市。它拥有丰富的历史和文化，曾是明朝的都城。长沙以其繁华的商业、独特的自然风光和丰富的历史遗迹而闻名，如橘子洲、岳麓山和长沙博物院。此外，长沙也是中国重要的科技和教育中心，包括中国工程院和中国科学院长沙分院。'"
          },
          "metadata": {}
        }
      ],
      "execution_count": 8,
      "metadata": {
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1724123639263
        }
      }
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python38-azureml",
      "language": "python",
      "display_name": "Python 3.8 - AzureML"
    },
    "language_info": {
      "name": "python",
      "version": "3.9.19",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "microsoft": {
      "ms_spell_check": {
        "ms_spell_check_language": "en"
      },
      "host": {
        "AzureML": {
          "notebookHasBeenCompleted": true
        }
      }
    },
    "kernel_info": {
      "name": "python38-azureml"
    },
    "nteract": {
      "version": "nteract-front-end@1.0.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}